Contexto

Análise

Leitura do conjunto de dados

df <- readxl::read_excel('./mobile_app_user_dataset_1.xlsx')[-1,]

Exploração

Uma vez que é uma pesquisa sobre mobile devices, veremos qual a proporção de pessoas que de fato possuem um device.


df %>% 
  select(Q2) %>% 
  drop_na() %>% 
  mutate(
    Q2 = case_when(
      Q2 == 1 ~ "Possui",
      Q2 != 1 ~ "Não Possui"
    )
  ) %>% 
  ggplot(
    aes(
      y = Q2,
      fill = Q2
    )
  ) +
  geom_bar(
    position = "dodge",
  ) +
  geom_label(
      stat = 'count',
      aes(
        label = ..count..,
      ),
      color = 'white',
      show.legend = FALSE
  ) + 
  labs(
    y = "",
    x = "Total de Pessoas",
    title = "Distribuição de pessoas que tem ou não celular",
    subtitle = "Dados oriundos da pesquisa realizada pela Harvard"
  ) +
  scale_fill_manual(
    values = c( "#C4161C", "#009491"),
  ) +
  theme_classic() +
  theme(
    axis.title.x = element_text(vjust=-.2, size=11),
    legend.title = element_blank()
  ) 



Podemos visualizar que a grande parte das pessoas possuem celular.


A proporção de pessoas que não tem é de 12%.


Visualizaremos agora os diferentes tipos de dispositivos utilizados pelos usuários que possuem celular


phone_format <- function (phone_list, phone_type, apply_function = function(param) param) {
  phone_dic = list(
    apple = c("apple", "iphone", "ipad", "aple", "appale", "ipod", "aplle", "i-phone", "ipone", "applke", "applr", "appme", "iphon"),
    blackberry = c("blackberry", "blackb", "blackeb", "baclkberry", "blakckberry", "blacberry", "blakberry", "blackerry", "bleckberry"),
    samsung = c("samsung", "samsumg", "sansung", "sumsung", "samsug", "samsun",  "samgung", "samsing", "samung", "sansug", "samasung", "samsang", "samsong", "sumsang", "galaxynote"),
    null = c("\\?", "9000", "930p"),
    sony_ericsson = c("sony-", "sonyer", "sony", "erison"),
    nokia = c("nokia", "nokya"),
    asus = c("asus"),
    acer = c("acer")
  ) 
  
  
  str_detect(phone_list, paste(phone_dic[[phone_type]], collapse = "|")) ~ apply_function(phone_type)
}

phones <- df %>%
  filter(Q2 == 1) %>% 
  select(Q3_1_TEXT) %>% 
  drop_na() %>% 
  mutate(
    Q3 = str_replace(str_to_lower(Q3_1_TEXT), " ", "")
  ) %>% 
  mutate(
    Q3 = case_when(
      phone_format(Q3, "apple"),
      phone_format(Q3, "samsung"),
      phone_format(Q3, "blackberry"),
      phone_format(Q3, "null"),
      phone_format(Q3, "sony_ericsson", function(param) str_replace(param, "_", " ")),
      TRUE ~ Q3
    )
  ) %>% 
  group_by(Q3) %>% 
  count() %>%
  arrange(desc(n)) 

phones
length(names(df))
[1] 161
df

q5_answers <- data.frame(
  row.names = c(1, 2, 3, 4, 5, 6, 7, 8, 9),
  val = c("Never", "Less than once a month ", "Once a month", "More than once a month", "Once a week", "More than once a week", "Once a day", "Several times a day", "Other")
)

df %>% 
  select(Q5) %>% 
  drop_na() %>% 
  mutate(
    Q5_TEXT = qr_answers[Q5, ]
  ) %>% 
  group_by(Q5_TEXT) %>% 
  count() %>% 
  arrange(desc(n)) %>% 
  ggplot(
    aes(
      x = reorder(Q5_TEXT, -n),
      y = n,
      fill = Q5_TEXT
    )
  ) +
  geom_col() +
  labs(
    x = 'Frequencia de abertura da loja de aplicativos',
    y = 'Total'
  ) +
  theme(
    axis.text.x = element_text(angle = 45, vjust=.6),
  )

NA

q6_answers <- data.frame(
  row.names = c(1, 2, 3, 4, 5, 6),
  val = c("0 - 1", "2 - 5", "6 - 10", "11 - 20", "21 - 30", "Mais de 30")
)


df %>% 
  select(Q6) %>% 
  drop_na() %>% 
  mutate(
    Q6_TEXT = q6_answers[Q6,]
  ) %>% 
  group_by(Q6_TEXT) %>% 
  count() %>% 
  ggplot(
    aes(
      x = reorder(Q6_TEXT, -n),
      y = n,
      fill = Q6_TEXT
    )
  ) +
  geom_col() +
  labs(
    x = 'Quantidade de aplicativos baixados por mês',
    y = 'Total',
  ) +
  theme_classic() +
  labs(
    fill = 'Frequência'
  ) +
  theme(
    axis.text.x = element_text(vjust = -1),
    axis.title.x = element_text(vjust = -1),
  )

Vamos ver se a galera que mais baixa é a galera que mais acessa a loja


df %>% 
  select(Q5, Q6) %>% 
  drop_na() %>% 
  mutate(
    Q5 = q5_answers[Q5,],
    Q6 = q6_answers[Q6, ]
  ) %>% 
  group_by(Q5, Q6) %>% 
    count() %>% 
  arrange(desc(n)) %>% 
  ggplot(
    aes(
      y = reorder(Q5, -n),
      x = n,
      fill = Q6
    )
  ) +
  geom_bar(
    stat = "identity",
    position = position_dodge(width = 1)
  ) +
  geom_label(
    aes(
      label = n,
    ),
    size = 3
  ) +
  labs(
    y = 'Frequência de acesso à loja de aplicativos',
    x = 'Total'
  ) +
  theme(
    axis.text.x = element_text(angle = 20, vjust = 0.5),
  ) +
  facet_grid(rows = 'Q6')

LS0tCnRpdGxlOiAiV29ybGR3aWRlIE1vYmlsZSBBcHAgVXNlciBCZWhhdmlvciBEYXRhc2V0IgphdXRob3I6ICJEaWVnbyIKZGF0ZTogIjIvMjIvMjAyMiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0UsIGVjaG8gPSBGQUxTRSwgZWNobyA9IEZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQoY2FjaGUgPSBUUlVFLCBjb2xsYXBzZSA9IFRSVUUpCmxpYnJhcnkocmVhZHIpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KHN0cmluZ3IpCmBgYAoKIyMgQ29udGV4dG8KCi4uLgoKIyMgQW7DoWxpc2UKCiMjIyBMZWl0dXJhIGRvIGNvbmp1bnRvIGRlIGRhZG9zCgpgYGB7cn0KZGYgPC0gcmVhZHhsOjpyZWFkX2V4Y2VsKCcuL21vYmlsZV9hcHBfdXNlcl9kYXRhc2V0XzEueGxzeCcpWy0xLF0KYGBgCgojIyMgRXhwbG9yYcOnw6NvCgo+IFVtYSB2ZXogcXVlIMOpIHVtYSBwZXNxdWlzYSBzb2JyZSAqbW9iaWxlIGRldmljZXMqLCB2ZXJlbW9zIHF1YWwgYSBwcm9wb3LDp8OjbyBkZSBwZXNzb2FzIHF1ZSBkZSBmYXRvIHBvc3N1ZW0gdW0gKmRldmljZSouCgpgYGB7cn0KCmRmICU+JSAKICBzZWxlY3QoUTIpICU+JSAKICBkcm9wX25hKCkgJT4lIAogIG11dGF0ZSgKICAgIFEyID0gY2FzZV93aGVuKAogICAgICBRMiA9PSAxIH4gIlBvc3N1aSIsCiAgICAgIFEyICE9IDEgfiAiTsOjbyBQb3NzdWkiCiAgICApCiAgKSAlPiUgCiAgZ2dwbG90KAogICAgYWVzKAogICAgICB5ID0gUTIsCiAgICAgIGZpbGwgPSBRMgogICAgKQogICkgKwogIGdlb21fYmFyKAogICAgcG9zaXRpb24gPSAiZG9kZ2UiLAogICkgKwogIGdlb21fbGFiZWwoCiAgICAgIHN0YXQgPSAnY291bnQnLAogICAgICBhZXMoCiAgICAgICAgbGFiZWwgPSAuLmNvdW50Li4sCiAgICAgICksCiAgICAgIGNvbG9yID0gJ3doaXRlJywKICAgICAgc2hvdy5sZWdlbmQgPSBGQUxTRQogICkgKyAKICBsYWJzKAogICAgeSA9ICIiLAogICAgeCA9ICJUb3RhbCBkZSBQZXNzb2FzIiwKICAgIHRpdGxlID0gIkRpc3RyaWJ1acOnw6NvIGRlIHBlc3NvYXMgcXVlIHRlbSBvdSBuw6NvIGNlbHVsYXIiLAogICAgc3VidGl0bGUgPSAiRGFkb3Mgb3JpdW5kb3MgZGEgcGVzcXVpc2EgcmVhbGl6YWRhIHBlbGEgSGFydmFyZCIKICApICsKICBzY2FsZV9maWxsX21hbnVhbCgKICAgIHZhbHVlcyA9IGMoICIjQzQxNjFDIiwgIiMwMDk0OTEiKSwKICApICsKICB0aGVtZV9jbGFzc2ljKCkgKwogIHRoZW1lKAogICAgYXhpcy50aXRsZS54ID0gZWxlbWVudF90ZXh0KHZqdXN0PS0uMiwgc2l6ZT0xMSksCiAgICBsZWdlbmQudGl0bGUgPSBlbGVtZW50X2JsYW5rKCkKICApIAoKYGBgCjxiciAvPgo8YnIgLz4KCj4gUG9kZW1vcyB2aXN1YWxpemFyIHF1ZSBhIGdyYW5kZSBwYXJ0ZSBkYXMgcGVzc29hcyBwb3NzdWVtIGNlbHVsYXIuCgo8YnIgLz4KCj4gQSBwcm9wb3LDp8OjbyBkZSBwZXNzb2FzIHF1ZSBuw6NvIHRlbSDDqSBkZSBgciBzY2FsZXM6OnBlcmNlbnQocm91bmQoMTIwOC8xMDIwMCwgMikpYC4KCjxiciAvPgoKPiBWaXN1YWxpemFyZW1vcyBhZ29yYSBvcyBkaWZlcmVudGVzIHRpcG9zIGRlIGRpc3Bvc2l0aXZvcyB1dGlsaXphZG9zIHBlbG9zIHVzdcOhcmlvcyBxdWUgcG9zc3VlbSBjZWx1bGFyIAo+IAoKCmBgYHtyfQoKcGhvbmVfZm9ybWF0IDwtIGZ1bmN0aW9uIChwaG9uZV9saXN0LCBwaG9uZV90eXBlLCBhcHBseV9mdW5jdGlvbiA9IGZ1bmN0aW9uKHBhcmFtKSBwYXJhbSkgewogIHBob25lX2RpYyA9IGxpc3QoCiAgICBhcHBsZSA9IGMoImFwcGxlIiwgImlwaG9uZSIsICJpcGFkIiwgImFwbGUiLCAiYXBwYWxlIiwgImlwb2QiLCAiYXBsbGUiLCAiaS1waG9uZSIsICJpcG9uZSIsICJhcHBsa2UiLCAiYXBwbHIiLCAiYXBwbWUiLCAiaXBob24iKSwKICAgIGJsYWNrYmVycnkgPSBjKCJibGFja2JlcnJ5IiwgImJsYWNrYiIsICJibGFja2ViIiwgImJhY2xrYmVycnkiLCAiYmxha2NrYmVycnkiLCAiYmxhY2JlcnJ5IiwgImJsYWtiZXJyeSIsICJibGFja2VycnkiLCAiYmxlY2tiZXJyeSIpLAogICAgc2Ftc3VuZyA9IGMoInNhbXN1bmciLCAic2Ftc3VtZyIsICJzYW5zdW5nIiwgInN1bXN1bmciLCAic2Ftc3VnIiwgInNhbXN1biIsICAic2FtZ3VuZyIsICJzYW1zaW5nIiwgInNhbXVuZyIsICJzYW5zdWciLCAic2FtYXN1bmciLCAic2Ftc2FuZyIsICJzYW1zb25nIiwgInN1bXNhbmciLCAiZ2FsYXh5bm90ZSIpLAogICAgbnVsbCA9IGMoIlxcPyIsICI5MDAwIiwgIjkzMHAiKSwKICAgIHNvbnlfZXJpY3Nzb24gPSBjKCJzb255LSIsICJzb255ZXIiLCAic29ueSIsICJlcmlzb24iKSwKICAgIG5va2lhID0gYygibm9raWEiLCAibm9reWEiKSwKICAgIGFzdXMgPSBjKCJhc3VzIiksCiAgICBhY2VyID0gYygiYWNlciIpCiAgKSAKICAKICAKICBzdHJfZGV0ZWN0KHBob25lX2xpc3QsIHBhc3RlKHBob25lX2RpY1tbcGhvbmVfdHlwZV1dLCBjb2xsYXBzZSA9ICJ8IikpIH4gYXBwbHlfZnVuY3Rpb24ocGhvbmVfdHlwZSkKfQoKcGhvbmVzIDwtIGRmICU+JQogIGZpbHRlcihRMiA9PSAxKSAlPiUgCiAgc2VsZWN0KFEzXzFfVEVYVCkgJT4lIAogIGRyb3BfbmEoKSAlPiUgCiAgbXV0YXRlKAogICAgUTMgPSBzdHJfcmVwbGFjZShzdHJfdG9fbG93ZXIoUTNfMV9URVhUKSwgIiAiLCAiIikKICApICU+JSAKICBtdXRhdGUoCiAgICBRMyA9IGNhc2Vfd2hlbigKICAgICAgcGhvbmVfZm9ybWF0KFEzLCAiYXBwbGUiKSwKICAgICAgcGhvbmVfZm9ybWF0KFEzLCAic2Ftc3VuZyIpLAogICAgICBwaG9uZV9mb3JtYXQoUTMsICJibGFja2JlcnJ5IiksCiAgICAgIHBob25lX2Zvcm1hdChRMywgIm51bGwiKSwKICAgICAgcGhvbmVfZm9ybWF0KFEzLCAic29ueV9lcmljc3NvbiIsIGZ1bmN0aW9uKHBhcmFtKSBzdHJfcmVwbGFjZShwYXJhbSwgIl8iLCAiICIpKSwKICAgICAgVFJVRSB+IFEzCiAgICApCiAgKSAlPiUgCiAgZ3JvdXBfYnkoUTMpICU+JSAKICBjb3VudCgpICU+JQogIGFycmFuZ2UoZGVzYyhuKSkgCgpwaG9uZXMKYGBgCgpgYGB7cn0KbGVuZ3RoKG5hbWVzKGRmKSkKYGBgCgpgYGB7cn0KZGYKYGBgCgoKCmBgYHtyfQoKcTVfYW5zd2VycyA8LSBkYXRhLmZyYW1lKAogIHJvdy5uYW1lcyA9IGMoMSwgMiwgMywgNCwgNSwgNiwgNywgOCwgOSksCiAgdmFsID0gYygiTmV2ZXIiLCAiTGVzcyB0aGFuIG9uY2UgYSBtb250aCAiLCAiT25jZSBhIG1vbnRoIiwgIk1vcmUgdGhhbiBvbmNlIGEgbW9udGgiLCAiT25jZSBhIHdlZWsiLCAiTW9yZSB0aGFuIG9uY2UgYSB3ZWVrIiwgIk9uY2UgYSBkYXkiLCAiU2V2ZXJhbCB0aW1lcyBhIGRheSIsICJPdGhlciIpCikKCmRmICU+JSAKICBzZWxlY3QoUTUpICU+JSAKICBkcm9wX25hKCkgJT4lIAogIG11dGF0ZSgKICAgIFE1X1RFWFQgPSBxcl9hbnN3ZXJzW1E1LCBdCiAgKSAlPiUgCiAgZ3JvdXBfYnkoUTVfVEVYVCkgJT4lIAogIGNvdW50KCkgJT4lIAogIGFycmFuZ2UoZGVzYyhuKSkgJT4lIAogIGdncGxvdCgKICAgIGFlcygKICAgICAgeCA9IHJlb3JkZXIoUTVfVEVYVCwgLW4pLAogICAgICB5ID0gbiwKICAgICAgZmlsbCA9IFE1X1RFWFQKICAgICkKICApICsKICBnZW9tX2NvbCgpICsKICBsYWJzKAogICAgeCA9ICdGcmVxdWVuY2lhIGRlIGFiZXJ0dXJhIGRhIGxvamEgZGUgYXBsaWNhdGl2b3MnLAogICAgeSA9ICdUb3RhbCcKICApICsKICB0aGVtZSgKICAgIGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDUsIHZqdXN0PS42KSwKICApCiAgCmBgYAoKCmBgYHtyfQoKcTZfYW5zd2VycyA8LSBkYXRhLmZyYW1lKAogIHJvdy5uYW1lcyA9IGMoMSwgMiwgMywgNCwgNSwgNiksCiAgdmFsID0gYygiMCAtIDEiLCAiMiAtIDUiLCAiNiAtIDEwIiwgIjExIC0gMjAiLCAiMjEgLSAzMCIsICJNYWlzIGRlIDMwIikKKQoKCmRmICU+JSAKICBzZWxlY3QoUTYpICU+JSAKICBkcm9wX25hKCkgJT4lIAogIG11dGF0ZSgKICAgIFE2X1RFWFQgPSBxNl9hbnN3ZXJzW1E2LF0KICApICU+JSAKICBncm91cF9ieShRNl9URVhUKSAlPiUgCiAgY291bnQoKSAlPiUgCiAgZ2dwbG90KAogICAgYWVzKAogICAgICB4ID0gcmVvcmRlcihRNl9URVhULCAtbiksCiAgICAgIHkgPSBuLAogICAgICBmaWxsID0gUTZfVEVYVAogICAgKQogICkgKwogIGdlb21fY29sKCkgKwogIGxhYnMoCiAgICB4ID0gJ1F1YW50aWRhZGUgZGUgYXBsaWNhdGl2b3MgYmFpeGFkb3MgcG9yIG3DqnMnLAogICAgeSA9ICdUb3RhbCcsCiAgKSArCiAgdGhlbWVfY2xhc3NpYygpICsKICBsYWJzKAogICAgZmlsbCA9ICdGcmVxdcOqbmNpYScKICApICsKICB0aGVtZSgKICAgIGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KHZqdXN0ID0gLTEpLAogICAgYXhpcy50aXRsZS54ID0gZWxlbWVudF90ZXh0KHZqdXN0ID0gLTEpLAogICkKCmBgYAoKVmFtb3MgdmVyIHNlIGEgZ2FsZXJhIHF1ZSBtYWlzIGJhaXhhIMOpIGEgZ2FsZXJhIHF1ZSBtYWlzIGFjZXNzYSBhIGxvamEKCmBgYHtyLCBmaWcuaGVpZ2h0ID0gMTB9CgpkZiAlPiUgCiAgc2VsZWN0KFE1LCBRNikgJT4lIAogIGRyb3BfbmEoKSAlPiUgCiAgbXV0YXRlKAogICAgUTUgPSBxNV9hbnN3ZXJzW1E1LF0sCiAgICBRNiA9IHE2X2Fuc3dlcnNbUTYsIF0KICApICU+JSAKICBncm91cF9ieShRNSwgUTYpICU+JSAKICAgIGNvdW50KCkgJT4lIAogIGFycmFuZ2UoZGVzYyhuKSkgJT4lIAogIGdncGxvdCgKICAgIGFlcygKICAgICAgeSA9IHJlb3JkZXIoUTUsIC1uKSwKICAgICAgeCA9IG4sCiAgICAgIGZpbGwgPSBRNgogICAgKQogICkgKwogIGdlb21fYmFyKAogICAgc3RhdCA9ICJpZGVudGl0eSIsCiAgICBwb3NpdGlvbiA9IHBvc2l0aW9uX2RvZGdlKHdpZHRoID0gMSkKICApICsKICBnZW9tX2xhYmVsKAogICAgYWVzKAogICAgICBsYWJlbCA9IG4sCiAgICApLAogICAgc2l6ZSA9IDMKICApICsKICBsYWJzKAogICAgeSA9ICdGcmVxdcOqbmNpYSBkZSBhY2Vzc28gw6AgbG9qYSBkZSBhcGxpY2F0aXZvcycsCiAgICB4ID0gJ1RvdGFsJwogICkgKwogIHRoZW1lKAogICAgYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSAyMCwgdmp1c3QgPSAwLjUpLAogICkgKwogIGZhY2V0X2dyaWQocm93cyA9ICdRNicpCgpgYGAKCgpgYGB7cn0KCmRmICU+JSAKICBzZWxlY3QoUTdfVEVYVCkKCmBgYAoK